{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3.1 Tenosr\n",
    "## 基本Tensor操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch as t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[0., 0., 0.],\n",
      "        [0., 0., 0.]])\n",
      "tensor([[1., 2., 3.],\n",
      "        [4., 5., 6.]])\n",
      "[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]\n"
     ]
    }
   ],
   "source": [
    "a=t.Tensor(2,3)   # 指定形状Tensor\n",
    "b=t.Tensor([[1,2,3],[4,5,6]])   # list→Tensor\n",
    "c=b.tolist()    #Tensor→list\n",
    "print(a)\n",
    "print(b)\n",
    "print(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([2, 3])\n",
      "torch.Size([2, 3])\n",
      "6\n",
      "6\n"
     ]
    }
   ],
   "source": [
    "b_size=b.size()\n",
    "print(b_size)   #Tensor.size()返回Torch.size()对象\n",
    "print(b.shape)  #tensor.shape可以直接达到和tensor.size()相同的效果，但它不是方法，不用加括号\n",
    "print(b.numel())\n",
    "print(b.nelement())   #numel() 和nelement()作用相同"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[0.0000e+00, 0.0000e+00, 8.4078e-45],\n",
      "        [0.0000e+00, 1.4013e-45, 0.0000e+00]])\n",
      "tensor([2., 3.])\n"
     ]
    }
   ],
   "source": [
    "c=t.Tensor(b_size)   #既然b_size为 Torch.size()对象，则可以用做确定Tensor大小的参数\n",
    "d=t.Tensor((2,3))    #注意和a=t.Tensor(2,3)区别\n",
    "print(c)\n",
    "print(d)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 其他创建Tensor的方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[1., 1., 1.],\n",
      "        [1., 1., 1.]])\n",
      "tensor([[0., 0., 0.],\n",
      "        [0., 0., 0.]])\n",
      "tensor([1, 3, 5, 7])\n",
      "tensor([ 1.0000,  5.5000, 10.0000])\n",
      "tensor([[-0.3180,  0.3229, -0.7160],\n",
      "        [-1.2699, -1.8162, -0.1665]])\n",
      "tensor([[0.7685, 0.0837, 0.2983],\n",
      "        [0.5592, 0.6535, 0.6092]])\n",
      "tensor([1, 3, 4, 0, 2])\n",
      "tensor([[1., 0., 0.],\n",
      "        [0., 1., 0.]])\n"
     ]
    }
   ],
   "source": [
    "print(t.ones(2,3))\n",
    "print(t.zeros(2,3))\n",
    "print(t.arange(1,8,2))   #从1到8，每次步长为2\n",
    "print(t.linspace(1,10,3))#1到10，分为3部分\n",
    "print(t.randn(2,3))  #标准正态分布\n",
    "print(t.rand(2,3))   #均匀分布\n",
    "print(t.randperm(5))  #长度为5的随机排列\n",
    "print(t.eye(2,3))  #对角线为1，不要求行列一致"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 常用Tensor操作\n",
    "通过tensor.view()方法可以调整tensor的形状，比如将1行6列的数据调整为2行三列，但view操作不会改变计算机存储数据的方式，只是输出时的读取方式不同，view之后的新tensor与原tensor共享统一内存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([0, 1, 2, 3, 4, 5])\n",
      "tensor([[0, 1, 2],\n",
      "        [3, 4, 5]])\n",
      "tensor([[0, 1, 2],\n",
      "        [3, 4, 5]])\n"
     ]
    }
   ],
   "source": [
    "a=t.arange(0,6)\n",
    "print(a)\n",
    "a=a.view(2,3)\n",
    "print(a)\n",
    "b=a.view(-1,3)  #-1表示按另一维度自动计算大小\n",
    "print(b)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "unsqueeze()和squeeze()用于改变数据的维度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[[0, 1, 2],\n",
      "         [3, 4, 5]]])\n",
      "\n",
      " tensor([[[0, 1, 2]],\n",
      "\n",
      "        [[3, 4, 5]]])\n",
      "\n",
      " tensor([[[0],\n",
      "         [1],\n",
      "         [2]],\n",
      "\n",
      "        [[3],\n",
      "         [4],\n",
      "         [5]]])\n"
     ]
    }
   ],
   "source": [
    "print(b.unsqueeze(0))    #维度为1*2*3\n",
    "print('\\n',b.unsqueeze(1))    #维度为2*1*3\n",
    "print('\\n',b.unsqueeze(2))    #维度为2*3*1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[[[[0, 1, 2],\n",
      "           [3, 4, 5]]]]])\n"
     ]
    }
   ],
   "source": [
    "c=b.view(1,1,1,2,3)\n",
    "print(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[[[0, 1, 2],\n",
      "          [3, 4, 5]]]])\n",
      "tensor([[0, 1, 2],\n",
      "        [3, 4, 5]])\n",
      "tensor([[0, 1, 2],\n",
      "        [3, 4, 5]])\n"
     ]
    }
   ],
   "source": [
    "print(c.squeeze(0))  #压缩0维\n",
    "d=c\n",
    "for i in range(100):  #维度大于1的就无法压缩了\n",
    "    d=d.squeeze(0)\n",
    "print(d)\n",
    "print(c.squeeze()) #将所有维度为1的压缩"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "resize()是另一种用来调整size的方法,但它可以修改tensor的尺寸(不同于view)，即可以自动分配内存空间\n",
    "**从存储的角度讲，对tensor的操作可以分为两类：**\n",
    "- 不会修改自身数据，如a.add(b)，加法的结果返回一个新的tensor\n",
    "- 会修改自身数据，a.add_(b)，加法的结果仍存储在a中\n",
    "因为resize是会修改自身数据的，所以它的形式为：b.resize_()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[0, 1, 2],\n",
      "        [3, 4, 5]])\n",
      "tensor([[0, 1, 2]])\n",
      "tensor([[0, 1, 2]])\n"
     ]
    }
   ],
   "source": [
    "print(b)\n",
    "print(b.resize_(1,3))\n",
    "print(b) #此时b已经改变"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[                0,                 1,                 2],\n",
      "        [                3,                 4,                 5],\n",
      "        [31244220637118579, 25896144540467314, 31807067512504430]])\n"
     ]
    }
   ],
   "source": [
    "print(b.resize_(3,3))  #如果没有其他操作覆盖这一块区域，原来的数据是会保留的，但多出来的数据会分配存储空间"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 索引操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[-0.9328, -0.5227,  1.3218, -1.2532],\n",
      "        [-1.5375, -1.4575, -1.2773,  0.1476],\n",
      "        [ 0.8385, -0.8500,  0.9312,  1.6920]])\n",
      "torch.Size([3, 4])\n",
      "tensor([-0.9328, -0.5227,  1.3218, -1.2532])\n",
      "tensor([-0.9328, -0.5227,  1.3218, -1.2532])\n",
      "tensor([-0.9328, -1.5375,  0.8385])\n"
     ]
    }
   ],
   "source": [
    "a=t.randn(3,4)\n",
    "print(a)\n",
    "print(a.shape)\n",
    "print(a[0])  #第一个维度(数为3)选取0，第二个维度(数为4)选取全部\n",
    "print(a[0,:])#同上\n",
    "print(a[:,0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[-0.9328, -0.5227,  1.3218, -1.2532],\n",
      "        [-1.5375, -1.4575, -1.2773,  0.1476]])\n",
      "tensor([[-0.9328, -0.5227],\n",
      "        [-1.5375, -1.4575]])\n"
     ]
    }
   ],
   "source": [
    "print(a[:2])  #前两行\n",
    "print(a[:2,0:2]) #前两行，前两列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[False, False,  True, False],\n",
       "        [False, False, False, False],\n",
       "        [False, False, False,  True]])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a > 1 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([1.3218, 1.6920])\n",
      "tensor([1.3218, 1.6920])\n"
     ]
    }
   ],
   "source": [
    "b=a[a>1] #挑选出所有大于1的，等价于a.masked_select(a>1)\n",
    "print(b)\n",
    "print(a.masked_select(a>1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.9328, -0.5227,  1.3218, -1.2532],\n",
       "        [-1.5375, -1.4575, -1.2773,  0.1476]])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a[t.LongTensor([0,1])] #第0行和第1行"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**其他常用选择函数**\n",
    " \n",
    " |函数|功能|\n",
    " |-----|----|\n",
    " |index_select(input,dim,index)|在指定dim上选取某些行和列|\n",
    " |masked_select(input,mask)|同a[a>0]，使用ByteTensor选取|\n",
    " |non_zero(input)|非零元素的下标|\n",
    " |gather(input,dim,index)|根据index，在dim维度上选取数据，输出的size与index一样|\n",
    "    \n",
    "**gather()的具体示例如下：**\n",
    "1. 取对角线元素"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[0, 1, 2, 3]]) \n",
      "\n",
      "tensor([[ 0,  1,  2,  3],\n",
      "        [ 4,  5,  6,  7],\n",
      "        [ 8,  9, 10, 11],\n",
      "        [12, 13, 14, 15]]) \n",
      "\n",
      "tensor([[ 0,  5, 10, 15]])\n",
      "tensor([[0],\n",
      "        [1],\n",
      "        [2],\n",
      "        [3]]) \n",
      "\n",
      "tensor([[ 0],\n",
      "        [ 5],\n",
      "        [10],\n",
      "        [15]])\n"
     ]
    }
   ],
   "source": [
    "index=t.LongTensor([[0,1,2,3]])\n",
    "print(index,'\\n') #第一个维度的数为1\n",
    "a=t.arange(0,16).view(4,4)\n",
    "print(a,'\\n')\n",
    "print(a.gather(0,index))\n",
    "'''\n",
    "0表示对第一个维度操作，然后按index的顺序依次取\n",
    "即按行操作：第一行，第二行，第三行。。。，每一行按照index的顺序取\n",
    "'''\n",
    "index=t.LongTensor([[0,1,2,3]]).t()\n",
    "print(index,'\\n') #第二个维度的数为1 ，即4*1\n",
    "print(a.gather(1,index)) # 在第二个维度选取数据，依次取0号，1号，2,号。。。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2. 取反对角线元素"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[12,  9,  6,  3]])\n",
      "tensor([[ 3],\n",
      "        [ 6],\n",
      "        [ 9],\n",
      "        [12]])\n"
     ]
    }
   ],
   "source": [
    "index=t.LongTensor([[3,2,1,0]])\n",
    "# print(index,'\\n') #第二个维度的数为1 ，即4*1\n",
    "print(a.gather(0,index))\n",
    "index=index.t()\n",
    "print(a.gather(1,index))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3. 取两个对角线上元素"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[ 0,  5, 10, 15],\n",
      "        [12,  9,  6,  3]])\n",
      "tensor([[ 0,  3],\n",
      "        [ 5,  6],\n",
      "        [10,  9],\n",
      "        [15, 12]])\n"
     ]
    }
   ],
   "source": [
    "index=t.LongTensor([[0,1,2,3],[3,2,1,0]])\n",
    "print(a.gather(0,index))\n",
    "index=index.t()\n",
    "b=a.gather(1,index)\n",
    "print(b)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "与gather相应的逆操作是scatter_，sactter_把取出来的数据再放回去，<front color=red>注意scatter_是inplace操作</front>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.,  0.,  0.,  3.],\n",
       "        [ 0.,  5.,  6.,  0.],\n",
       "        [ 0.,  9., 10.,  0.],\n",
       "        [12.,  0.,  0., 15.]])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c=t.zeros(4,4)\n",
    "c.scatter_(1,index,b.float())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "上面这行代码如果按照原书中的写法会报错，报错如下：\n",
    "\n",
    "\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\\-\n",
    "\n",
    "RuntimeError&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;Traceback (most recent call last)\n",
    "\n",
    "< ipython-input-26-8c806181a3e0 > in < module >\n",
    "\n",
    "&emsp;&emsp;&emsp;1 c=t.zeros(4,4)\n",
    "\n",
    "\\-\\-\\-\\-\\-> 2 c.scatter_(1,index,b)\n",
    "\n",
    "RuntimeError: Expected object of scalar type Float but got scalar type Long for argument #4 'src' in call to _th_scatter_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 高级索引\n",
    "略\n",
    "## Tensor类型\n",
    "默认的tensor为FloatTensor，可以通过t.set_default_tensor_type修改默认类型\n",
    "各种类型之间可以相互转换，type(new_type)是通用的做法\n",
    "CPU tensor和GPU tensor之间的互相转换通过tensor.cuda和tensor.cpu实现\n",
    "同时Tensor还有一个new方法，用法与t.Tensor()一样"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[0., 0., 0.],\n",
      "        [0., 0., 0.]])\n",
      "tensor([[0., 0., 0.],\n",
      "        [0., 0., 0.]], dtype=torch.float32)\n",
      "tensor([[0., 0., 0.],\n",
      "        [0., 0., 0.]], dtype=torch.float32)\n"
     ]
    }
   ],
   "source": [
    "t.set_default_tensor_type('torch.DoubleTensor')\n",
    "a=t.Tensor(2,3)\n",
    "print(a)\n",
    "b=a.float()\n",
    "print(b)\n",
    "c=a.type_as(b)\n",
    "print(c)\n",
    "t.set_default_tensor_type('torch.FloatTensor')  #还原为默认"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "随着版本更新，原书代码会报错：\n",
    "TypeError: only floating-point types are supported as the default type\n",
    "因为Int型现在不支持设置为default，只能设置float类型的值为默认类型(float，double和half)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 逐元素操作\n",
    "这部分操作会对tensor的每一个元素进行操作\n",
    "\n",
    "![](https://s2.ax1x.com/2020/01/29/1QAD0A.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[0, 1, 2],\n",
      "        [3, 4, 5]]) \n",
      "\n",
      "tensor([[3, 3, 3],\n",
      "        [3, 4, 5]])\n"
     ]
    }
   ],
   "source": [
    "a=t.arange(0,6).view(2,3)\n",
    "print(a,'\\n')\n",
    "print(t.clamp(a,min=3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 归并操作\n",
    "![](https://s2.ax1x.com/2020/01/29/1QEJBj.png)\n",
    "以上函数大多都有参数dim，用来指定在哪一个维度上进行操作\n",
    "假设输入的形状为(m,n,k):\n",
    "- dim=0，输出形状(1,n,k)或(n,k)\n",
    "- dim=1，输出形状(m,1,k)或(m,k)\n",
    "- dim=2，输出形状(m,n,1)或(m,n)\n",
    "\n",
    "size中是否具有1，取决于参数keepdim，keepdim=True就会保留维度1（pytorch 0.2.0 起keepdim默认为False）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[2., 2., 2.]])\n",
      "tensor([2., 2., 2.])\n",
      "tensor([3., 3.])\n"
     ]
    }
   ],
   "source": [
    "b=t.ones(2,3)\n",
    "print(b.sum(dim=0,keepdim=True))\n",
    "print(b.sum(0)) #注意区别\n",
    "print(b.sum(dim=1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 比较\n",
    "比较操作中有一部分是逐元素比较，有一部分类似于归并操作\n",
    "![](https://s2.ax1x.com/2020/01/30/1lMHJJ.png)\n",
    "\n",
    "- t.max(tensor)：返回tensor中最大的一个数\n",
    "- t.max(tensor,dim)：指定维度上最大的数，返回tensor及下标\n",
    "- t.max(tensor1,tensor2)：比较两个tensor中比较大的元素"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[ 0.,  4.,  8.],\n",
      "        [12., 16., 20.]]) \n",
      "\n",
      "torch.return_types.max(\n",
      "values=tensor([12., 16., 20.]),\n",
      "indices=tensor([1, 1, 1]))\n"
     ]
    }
   ],
   "source": [
    "a=t.linspace(0,20,6).view(2,3)\n",
    "print(a,'\\n')\n",
    "print(t.max(a,0)) # 每一列上最大的数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[ 2.0000,  4.6000,  7.2000],\n",
      "        [ 9.8000, 12.4000, 15.0000]]) \n",
      "\n",
      "tensor([[ 2.0000,  4.6000,  8.0000],\n",
      "        [12.0000, 16.0000, 20.0000]])\n"
     ]
    }
   ],
   "source": [
    "b=t.linspace(2,15,6).view(2,3)\n",
    "print(b,'\\n')\n",
    "print(t.max(a,b)) #生成一个新的tensor的元素是原来两个逐元素相比之后较大的那一个"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([12., 16., 20.])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c=t.linspace(20,0,6).view(2,3)\n",
    "a[a>c]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 线性代数\n",
    "![](https://s2.ax1x.com/2020/01/30/1llUDP.png)\n",
    "代码略\n",
    "**需要注意的是**，矩阵的转置会导致存储空间的不连续，需要调用.contiguous方法将其转为连续\n",
    "## Tensor和Numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "ename": "IndentationError",
     "evalue": "unexpected indent (<ipython-input-27-148e75b6853c>, line 4)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;36m  File \u001b[1;32m\"<ipython-input-27-148e75b6853c>\"\u001b[1;36m, line \u001b[1;32m4\u001b[0m\n\u001b[1;33m    '''\u001b[0m\n\u001b[1;37m    ^\u001b[0m\n\u001b[1;31mIndentationError\u001b[0m\u001b[1;31m:\u001b[0m unexpected indent\n"
     ]
    }
   ],
   "source": [
    "a=t.ones(3,2)\n",
    "b=t.zeros(2,3,1)\n",
    "a+b\n",
    "    '''\n",
    "    以上代码是一个自动广播的过程\n",
    "    1. 因为a比b少1维，所以在a前面补1,形状变为(1,3,2)。等价于a.unsqueeze(0)。b的形状为(2,3,1)\n",
    "    2.a和b的第一和三维形状不一样，进行扩展，将他们的形状都变成(2,3,2)\n",
    "    '''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 手动广播过程\n",
    "a.unsqueeze(0).expand(2,3,2)+b.expand(2,3,2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "unsqueeze或view ： 为数据某一维的形状补1\n",
    "\n",
    "expand或expand_as :重复数组，该操作不会复制数组，所以不会占用额外空间\n",
    "\n",
    "repeat实现与expand相类似的功能，不使用它的原因是repeat会把相同数据复制多份，因此会占用额外的空间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# numpy操作\n",
    "import numpy as np\n",
    "a=np.ones([2,3],dtype=np.float32)\n",
    "print(a)\n",
    "b=t.from_numpy(a) #numpy->tensor\n",
    "print(b)\n",
    "c=b.numpy()\n",
    "print(c) #a,b,c三个对象共享内存空间"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 内部结构\n",
    "tensor分为信息区（Tensor）和存储区（Storage），信息区主要保存着tensor的形状（size）、步长（stride）、数据类型（type）等信息，而真正的数据则保存成连续数组。**由于数据动辄成千上万，因此信息区元素占用内存较少，主要内存占用则取决于tensor中元素的数目，也即存储区的大小**\n",
    "\n",
    "绝大多数操作并不修改tensor的数据，而只是修改了tensor的头信息。这种做法更节省内存，同时提升了处理速度。在使用中需要注意。\n",
    "\n",
    "此外有些操作会导致tensor不连续，这时需调用tensor.contiguous方法将它们变成连续的数据，该方法会使数据复制一份，不再与原来的数据共享storage。\n",
    "\n",
    "高级索引一般不共享stroage，而普通索引共享storage（普通索引可以通过只修改tensor的offset，stride和size，而不修改storage来实现）。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=t.arange(0,6)\n",
    "b=a.view(2,3)\n",
    "print(id(b.storage())==id(a.storage())) #a和b的内存地址一样，即是同一个storage"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 其他有关Tensor的话题\n",
    "### GPU/CPU\n",
    "tensor可以很随意的在gpu/cpu上传输。使用tensor.cuda(device_id)或者tensor.cpu()。另外一个更通用的方法是tensor.to(device)\n",
    "#### 注意\n",
    "- 尽量使用tensor.to(device), 将device设为一个可配置的参数，这样可以很轻松的使程序同时兼容GPU和CPU\n",
    "- 数据在GPU之中传输的速度要远快于内存(CPU)到显存(GPU), 所以尽量避免频繁的在内存和显存中传输数据。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=t.randn(3,4)\n",
    "a.device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if t.cuda.is_available():\n",
    "    a = t.randn(3,4, device=t.device('cuda:1'))\n",
    "    # 等价于\n",
    "    # a.t.randn(3,4).cuda(1)\n",
    "    # 但是前者更快\n",
    "    a.device\n",
    "# 可惜我下的torch版本是不支持gpu的，回头改一下\n",
    "print(t.cuda.is_available())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 持久化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if t.cuda.is_available():\n",
    "    a=a.cuda(1)\n",
    "t.save(a,'a.pth')\n",
    "b=t.load('a.pth')\n",
    "print(b)\n",
    "'''\n",
    "if t.cuda.is_available():\n",
    "    a = a.cuda(1) # 把a转为GPU1上的tensor,\n",
    "    t.save(a,'a.pth')\n",
    "\n",
    "    # 加载为b, 存储于GPU1上(因为保存时tensor就在GPU1上)\n",
    "    b = t.load('a.pth')\n",
    "    # 加载为c, 存储于CPU\n",
    "    c = t.load('a.pth', map_location=lambda storage, loc: storage)\n",
    "    # 加载为d, 存储于GPU0上\n",
    "    d = t.load('a.pth', map_location={'cuda:1':'cuda:0'})\n",
    "# 没有GPU版本，暂不运行\n",
    "'''"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 向量化\n",
    "向量化的内容之前有在吴恩达的深度学习视频中讲过，是一种高效计算方式，我们知道python要尽可能避免for循环，尽量使用向量化的数值计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def for_loop_add(x, y):\n",
    "    result = []\n",
    "    for i,j in zip(x, y):\n",
    "        result.append(i + j)\n",
    "    return t.Tensor(result)\n",
    "x = t.zeros(100)\n",
    "y = t.ones(100)\n",
    "%timeit -n 10 for_loop_add(x, y)\n",
    "%timeit -n 10 x + y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "此外还有以下几点需要注意：\n",
    "\n",
    "- 大多数t.function都有一个参数out，这时候产生的结果将保存在out指定tensor之中。\n",
    "- t.set_num_threads可以设置PyTorch进行CPU多线程并行计算时候所占用的线程数，这个可以用来限制PyTorch所占用的CPU数目。\n",
    "- t.set_printoptions可以用来设置打印tensor时的数值精度和格式。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## pytorch实现线性回归\n",
    "\n",
    "线性回归表达形式为$y = wx+b+e$，$e$为误差服从均值为0的正态分布。首先让我们来确认线性回归的损失函数：\n",
    "$$\n",
    "loss = \\sum_i^N \\frac 1 2 ({y_i-(wx_i+b)})^2\n",
    "$$\n",
    "然后利用**随机梯度下降法**更新参数$\\textbf{w}$和$\\textbf{b}$来最小化损失函数，最终学得$\\textbf{w}$和$\\textbf{b}$的数值。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pre\n",
    "import torch as t\n",
    "from matplotlib import pyplot as plt\n",
    "from IPython import display\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> %matplotlib inline\n",
    "\n",
    "是一个魔法函数（Magic Functions）。官方给出的定义是：IPython有一组预先定义好的所谓的魔法函数（Magic Functions），你可以通过命令行的语法形式来访问它们。可见“%matplotlib inline”就是模仿命令行来访问magic函数的在IPython中独有的形式。\n",
    "**注意：既然是IPython的内置magic函数，那么在Pycharm中是不会支持的。**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 生成伪数据\n",
    "t.manual_seed(1000) \n",
    "def get_fake_data(batch_size=8):\n",
    "    x=t.rand(batch_size,1)*5\n",
    "    y=2*x+3+t.randn(batch_size,1)\n",
    "    return x,y\n",
    "x,y=get_fake_data(batch_size=16)\n",
    "plt.scatter(x.squeeze(),y.squeeze())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 初始化参数\n",
    "w=t.rand(1,1)\n",
    "b=t.rand(1,1)\n",
    "lr=0.02 # 学习率\n",
    "for ii in range(500):\n",
    "    x, y = get_fake_data(batch_size=8)\n",
    "\n",
    "    #forward\n",
    "    y_pred=x.mm(w)+b.expand_as(x)\n",
    "    loss=0.5*(y-y_pred)**2\n",
    "    loss=loss.mean()\n",
    "\n",
    "    #backward\n",
    "    dloss=1\n",
    "    dy_pred=dloss*(y_pred-y)\n",
    "\n",
    "    dw = x.t().mm(dy_pred) #用了链式法则，所有深度学习书籍上面最基础的一个推导\n",
    "    db = dy_pred.sum()\n",
    "\n",
    "    # 更新参数\n",
    "    w.sub_(lr * dw) # 左式等价于w=w-lr*dw\n",
    "    b.sub_(lr * db)\n",
    "    \n",
    "    if ii%50 ==0:\n",
    "       \n",
    "        # 画图\n",
    "        display.clear_output(wait=True)\n",
    "        x = t.arange(0, 6).view(-1, 1)\n",
    "        y = x.float().mm(w) + b.expand_as(x)\n",
    "        plt.plot(x.cpu().numpy(), y.cpu().numpy()) # predicted\n",
    "        \n",
    "        x2, y2 = get_fake_data(batch_size=32) \n",
    "        plt.scatter(x2.numpy(), y2.numpy()) # true data\n",
    "        \n",
    "        plt.xlim(0, 5)\n",
    "        plt.ylim(0, 13)\n",
    "        plt.show()\n",
    "        plt.pause(0.5)\n",
    "        \n",
    "print('w: ', w.item(), 'b: ', b.item())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
